import altair as alt
import eland as ed
import json
import pandas as pd
from elasticsearch import Elasticsearch
with open('elasticsearch-host', 'r') as file:
es_host = file.read().replace('\n', '')
ed_covid = ed.read_es(es_host, 'covid19_age_reg_01')
ed_covid.head()
pd_covid = ed.eland_to_pandas(ed_covid)
pd_covid.head()
pd_covid_loc = pd_covid[[
'location',
'date',
'total_cases',
'life_expectancy',
'dalys',
'days_since_first',
'days_since_tenth',
'days_since_hundreth',
'population',
'age_young',
'age_working',
'age_eldery'
]]
pd_covid_loc.loc[:, ('prediction')] = pd_covid.loc[:, ('ml.total_cases_prediction')]
pd_covid_loc.loc[:, ('life_expectancy_importance')] = pd_covid.loc[:, ('ml.feature_importance.life_expectancy')]
pd_covid_loc.loc[:, ('dalys_importance')] = pd_covid.loc[:, ('ml.feature_importance.dalys')]
pd_covid_loc.loc[:, ('days_since_first_importance')] = pd_covid.loc[:, ('ml.feature_importance.days_since_first')]
pd_covid_loc.loc[:, ('days_since_tenth_importance')] = pd_covid.loc[:, ('ml.feature_importance.days_since_tenth')]
pd_covid_loc.loc[:, ('days_since_hundreth_importance')] = pd_covid.loc[:, ('ml.feature_importance.days_since_hundreth')]
pd_covid_loc.loc[:, ('population_importance')] = pd_covid.loc[:, ('ml.feature_importance.population')]
pd_covid_loc.loc[:, ('age_young_importance')] = pd_covid.loc[:, ('ml.feature_importance.age_young')]
pd_covid_loc.loc[:, ('age_working_importance')] = pd_covid.loc[:, ('ml.feature_importance.age_working')]
pd_covid_loc.loc[:, ('age_eldery_importance')] = pd_covid.loc[:, ('ml.feature_importance.age_eldery')]
alt.data_transformers.disable_max_rows()
alt.Chart(pd_covid_loc).transform_fold(
[
'age_young_importance',
'age_working_importance',
'age_eldery_importance',
'dalys_importance',
'life_expectancy_importance',
'population_importance',
'days_since_first_importance',
'days_since_tenth_importance',
'days_since_hundreth_importance',
],
as_=['vector', 'value']
).transform_calculate(
value_abs='abs(datum.value)'
).mark_bar().encode(
x=alt.X('sum(value_abs):Q', title=''),
y=alt.Y('vector:O', sort='-x', title='')
).properties(height=180)
alt.data_transformers.disable_max_rows()
# pd_covid_loc_imp = pd_covid_loc[pd_covid_loc['location']=='United States']
pd_covid_loc_imp = pd_covid_loc
domainMin=-125000
domainMax=125000
cWidth=150
cHeight=150
xVec='days_since_tenth:Q'
def feature_importance_chart(attX, attY):
return alt.Chart(pd_covid_loc_imp,width=cWidth,height=cHeight).mark_circle(size=5,opacity=1).encode(
alt.X(attX, scale=alt.Scale(domain=(domainMin,domainMax)), title=''),
alt.Y(attY, axis=alt.Axis(format='s'), title=''),
color=alt.Color(attX, scale=alt.Scale(domain=[domainMin,0,domainMax], range=['blue', 'darkred'])),
tooltip=['location', 'total_cases', 'prediction', attX, attY]
).interactive()
age_young = feature_importance_chart('age_young_importance', 'age_young:Q')
age_working = feature_importance_chart('age_working_importance', 'age_working:Q')
age_eldery = feature_importance_chart('age_eldery_importance', 'age_eldery:Q')
dalys = feature_importance_chart('dalys_importance', 'dalys:Q')
life = feature_importance_chart('life_expectancy_importance', 'life_expectancy:Q')
pop = feature_importance_chart('population_importance', 'population:Q')
day_since_first = feature_importance_chart('days_since_first_importance', 'days_since_first:Q')
day_since_tenth = feature_importance_chart('days_since_tenth_importance', 'days_since_tenth:Q')
day_since_hundreth = feature_importance_chart('days_since_hundreth_importance', 'days_since_hundreth:Q')
(
(age_young | age_working | age_eldery) &
(dalys | life | pop) &
(day_since_first | day_since_tenth | day_since_hundreth)
)
alt.data_transformers.disable_max_rows()
chart_cases = alt.Chart(pd_covid_loc).mark_line(
color='blue',
opacity=0.5,
size=2
).encode(
alt.X('date:T', title=''),
alt.Y('total_cases:Q', title='', scale=alt.Scale(domain=[0, 900000])),
tooltip=[alt.Tooltip('date:T', title='Date'), alt.Tooltip('total_cases:Q', title='Total Cases')]
)
chart_cases_predicted = alt.Chart(pd_covid_loc).mark_line(
color='red',
opacity=0.5,
size=2
).encode(
alt.X('date:T', title=''),
alt.Y('prediction:Q', title='', scale=alt.Scale(domain=[0, 10000])),
tooltip=[alt.Tooltip('date:T', title='Date'), alt.Tooltip('total_cases:Q', title='Total Cases Prediction')]
)
(chart_cases + chart_cases_predicted).properties(
width=100,
height=120
).facet(
facet='location:N',
columns=5
)